############### Castle and Pelc 2019: Leaks ISQ###############
############################################################## 

# Date: March 2019
# Authors: Matthew Castle and Krzysztof Pelc
# Output: Quantitative text analysis replication for Castle and Pelc, "The Causes and Effects of Leaks in International Negotiations", ISQ.

# clear
rm(list=ls())

# packages
library(quanteda)
library(readtext)
library(SnowballC)
library(tm)
library(tidytext)
library(tidyr)
library(methods)

#import data
load("CaPe_ISQ_leaks.RData")

# set seed
set.seed(12345)

# detach conflicting packages, if present
detach("package:quanteda", unload=TRUE)

# use required packages
library(lubridate)
library(stringr)
library(tidytext)
library(plyr)
library(dplyr)
library(broom)
library(scales)
library(twitteR)
library(wordcloud)
library(reshape2)
library(quanteda)

## Programmes to get sentiment on sampled 30 sentence-long articles, with bootraps

# leak programme
boot_leak_hist <- function(document=leak_cov_corpus[1],nboot=200){
  
  means <- c()
  boot_results <- c()
  leak_sample_list <- NULL
  sentiment_list <- NULL
  
  for(i in 1:nboot){
    
    leak_sentences <- corpus_reshape(document, "sentences")
    leak_sentence_vector <- texts(leak_sentences)
    leak_sample_list[i] <- paste(sample(leak_sentence_vector, size=30, replace=TRUE), collapse=" ")
    
    samp_dfm <- dfm(leak_sample_list[i])
    samp_tidy <- tidy(samp_dfm)
    samp_sentiments <- samp_tidy %>%
      inner_join(get_sentiments("bing"), by = c(term = "word"))
    
    sentiments <- samp_sentiments %>%
      count(document, sentiment, wt = count) %>%
      ungroup() %>%
      spread(sentiment, n, fill = 0) %>%
      mutate(sentiment = positive - negative) %>%
      arrange(sentiment)
    
    sentiment_list[i] <- as.numeric(sentiments$sentiment)
    
    mean_sentiment <- mean(sentiment_list)
    upper_sentiment <- quantile(sentiment_list, c(0.975))
    lower_sentiment <- quantile(sentiment_list, c(0.025))
    
    cat("done",i,"of",nboot,"resamples\n") 
  }
  leak_sentiments <- sentiment_list
}

# official release programme

boot_release_hist <- function(document=release_cov_corpus[1],nboot=200){
  
  means <- c()
  boot_results <- c()
  release_sample_list <- NULL
  sentiment_list <- NULL
  
  for(i in 1:nboot){
    
    release_sentences <- corpus_reshape(document, "sentences")
    release_sentence_vector <- texts(release_sentences)
    release_sample_list[i] <- paste(sample(release_sentence_vector, size=30, replace=TRUE), collapse=" ")
    
    samp_dfm <- dfm(release_sample_list[i])
    samp_tidy <- tidy(samp_dfm)
    samp_sentiments <- samp_tidy %>%
      inner_join(get_sentiments("bing"), by = c(term = "word"))
    
    sentiments <- samp_sentiments %>%
      count(document, sentiment, wt = count) %>%
      ungroup() %>%
      spread(sentiment, n, fill = 0) %>%
      mutate(sentiment = positive - negative) %>%
      arrange(sentiment)
    
    sentiment_list[i] <- as.numeric(sentiments$sentiment)
    
    mean_sentiment <- mean(sentiment_list)
    upper_sentiment <- quantile(sentiment_list, c(0.975))
    lower_sentiment <- quantile(sentiment_list, c(0.025))
    
    cat("done",i,"of",nboot,"resamples\n") 
  }
  release_sentiments <- sentiment_list
}

## results of sentiment analysis

# leak coverage
set.seed(12345)
boot_leak_sentiment_raw <- boot_leak_hist(leak_cov_corpus) 
leak_mean_sentiment <- mean(boot_leak_sentiment_raw)
leak_upper_sentiment <- quantile(boot_leak_sentiment_raw, c(0.975))
leak_lower_sentiment <- quantile(boot_leak_sentiment_raw, c(0.025))

leak_sentiment_CIs <- c(leak_lower_sentiment, leak_mean_sentiment, leak_upper_sentiment)


# release coverage
set.seed(12345)
boot_release_sentiment_raw <- boot_release_hist(release_cov_corpus) 
hist(boot_release_sentiment_raw)

release_mean_sentiment <- mean(boot_release_sentiment_raw)
release_upper_sentiment <- quantile(boot_release_sentiment_raw, c(0.975))
release_lower_sentiment <- quantile(boot_release_sentiment_raw, c(0.025))

release_sentiment_CIs <- c(release_lower_sentiment, release_mean_sentiment, release_upper_sentiment)

# confidence intervals
leak_sentiment_CIs
release_sentiment_CIs

# plot
library(ggplot2)

pdf('cov_leak_hist.pdf')

sentiment_data <- data.frame(cbind(boot_leak_sentiment_raw,boot_release_sentiment_raw))
colnames(sentiment_data)<- c("leak sentiment", "official release sentiment")

leakbarfill <- 'grey50'
leakbarline <- 'grey25'

ggplot(data=sentiment_data, aes(sentiment_data$`leak sentiment`)) + 
  geom_histogram(aes(y = ..density..), 
                 breaks=seq(-20, 30, by = 2), 
                 binwidth = 2,
                 col= leakbarline,
                 fill= leakbarfill,
                 alpha = .75) + 
  theme_bw() +
  theme(axis.line = element_line(colour = "black"),
        panel.border = element_blank(),
        panel.background = element_blank()) +
  geom_density(col="black") +
  labs(title = "Probability density plot for leak sentiment") +
  labs(x="Sentiment of sample text", y="Probability density") + 
  geom_vline(xintercept = 0, size = 1, colour = "black",
             linetype = "dashed") +
  scale_y_continuous(limits = c(0, 0.08))

dev.off()

pdf('cov_release_hist.pdf')

relbarfill <- "grey75"
relbarlines <- "grey25"

ggplot(data=sentiment_data, aes(sentiment_data$`official release sentiment`)) + 
  geom_histogram(aes(y = ..density..),
                 ylim = c(0,1),
                 breaks=seq(-20, 30, by = 2), 
                 binwidth = 2,
                 col=relbarlines, 
                 fill=relbarfill, 
                 alpha = .75)  +
  theme_bw() +
  theme(axis.line = element_line(colour = "black"),
        panel.border = element_blank(),
        panel.background = element_blank()) +
  geom_density(col="black") +
  labs(title = "Probability density plot for official release sentiment") +
  labs(x="Sentiment of sample text", y="Probability density") +
  geom_vline(xintercept = 0, size = 1, colour = "black",
             linetype = "dashed") +
  scale_y_continuous(limits = c(0, 0.08))

dev.off()

# t-test to confirm difference in sentiment
ttable<- t.test(boot_release_sentiment_raw, boot_leak_sentiment_raw)
ttable

#### ENDS